//
// Project: Disagreement in science: Missing women



clear all
version 15.1  



//
// set locals

// method of identifying female variable
local female "female_genderize"

// gender of author is known
local known_gender "female_genderize!=."





//
// AER PREVIOUS PUBS FROM ALL AEA JOURNALS

// call data
use "${data}/output/aer_data_gender.dta", clear
drop if month=="May" & year!=2019  // exclude AEA papers and proceedings
drop if year==2020  
keep if (comment | research_article)
append using "${data}/output/all_aea_data_nogender.dta"  // include observations from all AEA journals to capture seniority
keep if journal=="American Economic Review" | type=="JOURNAL ARTICLE" | type=="PAPERS" | type=="REGULAR ARTICLE" | type=="REGULAR ARTICLES" | type=="Regular papers" | type=="SHORTER PAPERS"

// generate number of previous publications in the journal by author
bysort full_name (article_id_chronological): generate previous_pubs = _n
keep if journal=="American Economic Review"  // observations from the AER

// identify papers that receive comments
generate commented = call_by!=""

// generate indicator of last author in article
egen last_author_id = max(author_id), by(article_id)
generate last_author = author_id==last_author_id

// identify average seniority of paper commented on
bysort article_id: egen avg_seniority_own = mean(previous_pubs)
generate avg_seniority_target = .
levelsof call_to, local(levels)
foreach l of local levels {
	sum avg_seniority_own if article_id==`l', meanonly
	local share = r(mean)
	replace avg_seniority_target=`share' if call_to==`l'
}

// identify seniority of last author on paper commented on
generate seniority_last_author_target = .
levelsof call_to, local(levels)
foreach l of local levels {
	sum previous_pubs if article_id==`l' & last_author==1, meanonly
	local pubs = r(mean)
	replace seniority_last_author_target = `pubs' if call_to==`l'
}


// analysis
// average seniority of all authors on target paper
local i=1
local coefficients`i' "2"
regress avg_seniority_target i.`female' i.year if comment==1, vce(robust)  
matrix m`i' = r(table)
margins `female'
matrix a`i' = r(b)

// seniority of last author on target paper
local i=2
local coefficients`i' "2"
regress seniority_last_author_target i.`female' if comment==1, vce(robust) // year fixed effects excluded because of small sample
matrix m`i' = r(table)
margins `female'
matrix a`i' = r(b)




//
// ASR

// call data
use "${data}/output/asr_data_gender.dta", clear
keep if (comment | research_article)

// generate cumulative number of publications in the journal by author
bysort full_name (article_id): generate previous_pubs = _n

// identify papers that receive comments
generate commented = call_by!=.

// generate indicator of last author in article
egen last_author_id = max(author_id), by(article_id)
generate last_author = author_id==last_author_id

// identify average seniority of paper commented on
bysort article_id: egen avg_seniority_own = mean(previous_pubs)
generate avg_seniority_target = .
levelsof call_to, local(levels)
foreach l of local levels {
	sum avg_seniority_own if article_id==`l', meanonly
	local share = r(mean)
	replace avg_seniority_target=`share' if call_to==`l'
}

// identify seniority of last author on paper commented on
generate seniority_last_author_target = .
levelsof call_to, local(levels)
foreach l of local levels {
	sum previous_pubs if article_id==`l' & last_author==1, meanonly
	local pubs = r(mean)
	replace seniority_last_author_target = `pubs' if call_to==`l'
}


// analysis
// average seniority of all authors on target paper
local i=3
local coefficients`i' "2"
regress avg_seniority_target i.`female' i.year if comment==1, vce(robust)  
matrix m`i' = r(table)
margins `female'
matrix a`i' = r(b)

// seniority of last author on target paper
local i=4
local coefficients`i' "2"
regress seniority_last_author_target i.`female' if comment==1, vce(robust) // year fixed effects excluded because of small sample
matrix m`i' = r(table)
margins `female'
matrix a`i' = r(b)




//
// Nature

// call data
use "${data}/output/nature_data_gender.dta", clear
drop if year==2020
keep if comment | research_article

// generate cumulative number of publications in the journal by author
bysort full_name (article_id): generate previous_pubs = _n

// identify papers that receive comments
generate commented = call_by!=.

// generate indicator of last author in article
egen last_author_id = max(author_id), by(article_id)
generate last_author = author_id==last_author_id

// identify average seniority of paper commented on
bysort article_id: egen avg_seniority_own = mean(previous_pubs)
generate avg_seniority_target = .
levelsof call_to, local(levels)
foreach l of local levels {
	sum avg_seniority_own if article_id==`l', meanonly
	local share = r(mean)
	replace avg_seniority_target=`share' if call_to==`l'
}

// identify seniority of last author on paper commented on
generate seniority_last_author_target = .
levelsof call_to, local(levels)
foreach l of local levels {
	sum previous_pubs if article_id==`l' & last_author==1, meanonly
	local pubs = r(mean)
	replace seniority_last_author_target = `pubs' if call_to==`l'
}


// analysis
// average seniority of all authors on target paper
local i=5
local coefficients`i' "2"
regress avg_seniority_target i.`female' i.year if comment==1, vce(robust)  
matrix m`i' = r(table)
margins `female'
matrix a`i' = r(b)

// seniority of last author on target paper
local i=6
local coefficients`i' "2"
regress seniority_last_author_target i.`female' i.year if comment==1, vce(robust) 
matrix m`i' = r(table)
margins `female'
matrix a`i' = r(b)




//
// PNAS 

// call data
use "${data}/output/pnas_data_gender.dta", clear
drop if full_name=="II" | full_name=="III" | full_name=="IV" | full_name=="Jr" | full_name=="Jr."  // erroneously scraped as separate author-article observations
drop if year==2020  // PNAS started comments in 2008
keep if comment | research_article

// generate cumulative number of publications in the journal by author
bysort full_name (article_id): generate previous_pubs = _n

// include only years with comments, after counting number of publications in years before
drop if year<2008  

// identify papers that receive comments
generate commented = call_by!=.

// generate indicator of last author in article
egen last_author_id = max(author_id), by(article_id)
generate last_author = author_id==last_author_id

// identify average seniority of paper commented on
bysort article_id: egen avg_seniority_own = mean(previous_pubs)
generate avg_seniority_target = .
levelsof call_to, local(levels)
foreach l of local levels {
	sum avg_seniority_own if article_id==`l', meanonly
	local share = r(mean)
	replace avg_seniority_target=`share' if call_to==`l'
}

// identify seniority of last author on paper commented on
generate seniority_last_author_target = .
levelsof call_to, local(levels)
foreach l of local levels {
	sum previous_pubs if article_id==`l' & last_author==1, meanonly
	local pubs = r(mean)
	replace seniority_last_author_target = `pubs' if call_to==`l'
}

// analysis

// average seniority of all authors on target paper
local i=7
local coefficients`i' "2"
regress avg_seniority_target i.`female' i.year if comment==1, vce(robust)  
matrix m`i' = r(table)
margins `female'
matrix a`i' = r(b)

// seniority of last author on target paper
local i=8
local coefficients`i' "2"
regress seniority_last_author_target i.`female' i.year if comment==1, vce(robust) 
matrix m`i' = r(table)
margins `female'
matrix a`i' = r(b)





//
// Science

// call data
use "${data}/output/science_data_gender.dta", clear
drop if year==2020
keep if comment | research_article

// generate cumulative number of publications in the journal by author
bysort full_name (article_id): generate previous_pubs = _n

// generate indicator of last author in article
egen last_author_id = max(author_id), by(article_id)
generate last_author = author_id==last_author_id

// identify papers that receive comments
generate commented = call_by_1!=.

// identify average seniority of paper commented on
bysort article_id: egen avg_seniority_own = mean(previous_pubs)
generate num_targets = .
levelsof call_by_1, local(levels)
foreach l of local levels {
	tab article_id if call_by_1==`l'
	local num_targets = r(r)
	replace num_targets = `num_targets' if article_id==`l'
	sum article_id if call_by_1==`l' & `num_targets'==1
	local target = r(mean)
	replace call_to = `target' if article_id==`l'
}
* manually enter call_to for comments that target more than 1 paper
generate call_to_2 = .
generate call_to_3 = .
replace call_to = 7015 if article_id==8095
replace call_to_2 = 7022 if article_id==8095
replace call_to = 2156 if article_id==10155
replace call_to_2 = 7722 if article_id==10155
replace call_to = 10952 if article_id==12148
replace call_to_2 = 10953 if article_id==12148
replace call_to = 13062 if article_id==17888
replace call_to_2 = 16640 if article_id==17888
replace call_to = 22086 if article_id==23897
replace call_to_2 = 22087 if article_id==23897
replace call_to_3 = 22088 if article_id==23897
replace call_to = 31074 if article_id==32659
replace call_to_2 = 31075 if article_id==32659
replace call_to_3 = 31076 if article_id==32659
* call_by_2
generate flag = 0
levelsof call_by_2, local(levels)
foreach l of local levels {
	tab article_id if call_by_2==`l'
	sum article_id if call_by_2==`l'
	local target = r(mean)
	replace flag = 1 if article_id==`l' & call_to!=.
	replace call_to = `target' if article_id==`l' & call_to==.
}
* manually enter call_to_2 for paper that has already a call_to_1 
replace call_to_2 = 20768 if article_id==25795
* call_by_3
drop flag
generate flag = 0
levelsof call_by_3, local(levels)
foreach l of local levels {
	tab article_id if call_by_3==`l'
	sum article_id if call_by_3==`l'
	local target = r(mean)
	replace flag = 1 if article_id==`l' & call_to!=.
	replace call_to = `target' if article_id==`l' & call_to==.
}
* call_by_4
drop flag
generate flag = 0
levelsof call_by_4, local(levels)
foreach l of local levels {
	tab article_id if call_by_4==`l'
	sum article_id if call_by_4==`l'
	local target = r(mean)
	replace flag = 1 if article_id==`l' & call_to!=.
	replace call_to = `target' if article_id==`l' & call_to==.
}
* call_by_5
drop flag
generate flag = 0
levelsof call_by_5, local(levels)
foreach l of local levels {
	tab article_id if call_by_5==`l'
	sum article_id if call_by_5==`l'
	local target = r(mean)
	replace flag = 1 if article_id==`l' & call_to!=.
	replace call_to = `target' if article_id==`l' & call_to==.
}
* call_by_6
drop flag
generate flag = 0
levelsof call_by_6, local(levels)
foreach l of local levels {
	tab article_id if call_by_6==`l'
	sum article_id if call_by_6==`l'
	local target = r(mean)
	replace flag = 1 if article_id==`l' & call_to!=.
	replace call_to = `target' if article_id==`l' & call_to==.
}
* call_by_7
drop flag
generate flag = 0
levelsof call_by_7, local(levels)
foreach l of local levels {
	tab article_id if call_by_7==`l'
	sum article_id if call_by_7==`l'
	local target = r(mean)
	replace flag = 1 if article_id==`l' & call_to!=.
	replace call_to = `target' if article_id==`l' & call_to==.
}
* call_by_8
generate target=.
drop flag
generate flag = 0
levelsof call_by_8, local(levels)
foreach l of local levels {
	tab article_id if call_by_8==`l'
	sum article_id if call_by_8==`l'
	local target = r(mean)
	replace target = `target'
	replace flag = 1 if article_id==`l' & call_to!=.
	replace call_to = `target' if article_id==`l' & call_to==.
}
*average seniority
generate avg_seniority_target = .
levelsof call_to, local(levels)
foreach l of local levels {
	sum avg_seniority_own if article_id==`l', meanonly
	local share = r(mean)
	replace avg_seniority_target=`share' if call_to==`l'
}
// identify seniority of last author on paper commented on
generate seniority_last_author_target = .
levelsof call_to, local(levels)
foreach l of local levels {
	sum previous_pubs if article_id==`l' & last_author==1, meanonly
	local pubs = r(mean)
	replace seniority_last_author_target = `pubs' if call_to==`l'
}

// analysis

// average seniority of all authors on target paper
local i=9
local coefficients`i' "2"
regress avg_seniority_target i.`female' i.year if comment==1, vce(robust)  
matrix m`i' = r(table)
margins `female'
matrix a`i' = r(b)

// seniority of last author on target paper
local i=10
local coefficients`i' "2"
regress seniority_last_author_target i.`female' i.year if comment==1, vce(robust) 
matrix m`i' = r(table)
margins `female'
matrix a`i' = r(b)



